rm(list=ls())

#set working directory that includes this script and the raw data files

setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

#check working directory is correct
getwd()

#Load all the packages needed for analysis

if (!require("pacman")) install.packages("pacman")
pacman::p_load(tidyr, dplyr, lmerTest, stargazer, 
               mfx, ggplot2, ggpubr, reshape2
)

#Read data from working directory
df <- read.csv(file = "RawData/Evasion study Receiver Final - Experiment 3_May 4, 2022_07.17.csv", header = T, sep = ",")

#remove additional headers generated by Qualtrics
df <- df %>%  filter(!row_number() %in% c(1, 2))

to_exclude<- read.csv(file = "Experiment3_participated_twice.csv", header = T, sep = ",")

n <- nrow(df) # N = 1229

df<-df%>%
  dplyr::rename(Prolific_id=Q37, Duration=Duration..in.seconds.,
         Control_q1=Q12,	errors_q1=Q13,	Control_q2=Q14,	errors_q2=Q15,	
         Control_q3=Q16,	errors_q3=Q17,	Control_q4=Q18,	errors_q4=Q19,	
         Control_q5=Q20,	errors_q5=Q21, 
         decision_deceptive_m=Q48, decision_nondeceptive_m=Q46,	decision_blue_m=Q127,
         b_S_deceptive=Q39,	b_R_guess_red_after_deceptive=Q41, 
         explanation_guess=Q36,
         gender=Q33,	gender_other=Q33_3_TEXT, age=Q34_1_TEXT, 
         education=Q35, education_other=Q35_7_TEXT,
         condition=Name)%>%
  dplyr::mutate(sub_id=row_number())%>%
  filter(!(Prolific_id %in% to_exclude$Prolific_id))


# Check if any duplicates
summary(duplicated(df$Prolific_id)) # TRUE = 1
df <- subset(df, duplicated(df$Prolific_id) == FALSE) 
# N = 1201

# Summary stats to see how many people didn't finish the survey
table(df$Finished) # FALSE = 0 
df <- subset(df, Finished == "True" & DistributionChannel!="preview") 
# N = 1201

# keep only relevant variables

df<-df%>%
  dplyr::select(Duration, ResponseId, Control_q1,	errors_q1,	
                Control_q2,	errors_q2,	Control_q3,	errors_q3,	
                Control_q4,	errors_q4,	Control_q5,	errors_q5,
                decision_deceptive_m, decision_nondeceptive_m, decision_blue_m,
                b_S_deceptive, b_R_guess_red_after_deceptive,
                gender, gender_other, age, education, education_other,
                explanation_guess,
                condition, 	Prolific_id,	X,	Z, sub_id)


#------------------------------
# CREATE AND EDIT VARS
#------------------------------

evasion_S3<-df %>%
  mutate(treat=ifelse(condition=="Treatment I don't know", "t_idk",
                      ifelse(condition=="Treatment silence", "t_s",
                             ifelse(condition=="Treatment Half-truth", "t_ht",
                                    ifelse(condition=="Control silence", "c_s",
                                           ifelse(condition=="Control Half-truth", "c_ht","c_idk"))))))

evasion_S3<-evasion_S3 %>%
  mutate(treat_pool=ifelse(treat=="c_s"|treat=="c_ht"|treat=="c_idk","control",treat))

evasion_S3<-arrange(evasion_S3, condition)

# Drop unused factor levels from dataset
evasion_S3 <- droplevels(evasion_S3)

df_S3<-evasion_S3%>%
  mutate(age_clean=ifelse(age=="-99"|age=="", NA,
                          ifelse(age=="32 years", "32",
                                 ifelse(age=="fifty", "50",
                                        ifelse(age=="Fifty four", "54",
                                               ifelse(age=="fiftyseven", "57", age))))),
         female=ifelse(gender=="Female",1,
                       ifelse(gender=="I would prefer not to answer",NA, 0)))%>%
  mutate(educ_recode = ifelse(education %in% c("Undergraduate University degree", "Masters degree", "Doctoral or professional degree (JD, MD, PhD)", "College or 6th form"),"high",
                              ifelse(education=="Less than secondary school" | education =="Secondary school", "medium_low", 
                                     ifelse(education=="I would prefer not to answer", NA, "other"))))%>%
  mutate(educ_high=ifelse(educ_recode=="high",1,0))%>%
  mutate(treat_pool_bin=ifelse(treat_pool=="control", "direct_lie", "evasion"))%>%
  mutate(choose_red_deceptive_m=ifelse(decision_deceptive_m=="RED",1,0),
         choose_red_nondeceptive_m=ifelse(decision_nondeceptive_m=="RED",1,0),
         choose_red_blue_m=ifelse(decision_blue_m=="RED",1,0))

df_S3$age_clean<-as.numeric(df_S3$age_clean)
df_S3$b_S_deceptive<-as.numeric(df_S3$b_S_deceptive)
df_S3$b_R_guess_red_after_deceptive<-as.numeric(df_S3$b_R_guess_red_after_deceptive)

# -------- Write output data to file ## ----------

# R format
#filename <- paste("Data-Evasion-S1", Sys.Date(), ".Rdata", sep="")
filename <- "Data-Evasion-S3.Rdata"
save(df_S3, file = filename)
# .csv format
write.csv(df_S3, "Data-Evasion-S3.csv")
